# !/usr/bin/python3
# -*- coding: utf-8 -*-

import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import LeaveOneOut
import sklearn.naive_bayes
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from numpy import loadtxt

from xgboost import XGBClassifier
import xgboost
import lightgbm
import catboost

from datetime import datetime
import pandas as pd
import joblib
import os
import sys
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import pickle


from pandas import read_csv

#xgboost决策树
def xgb_train(training_data, training_label, testing_data, testing_label):
    #拆分训练集和测试集
    X_train, y_train, X_test, y_test = training_data, training_label, testing_data, testing_label

    clf = XGBClassifier(use_label_encoder=False)
    t_begin = datetime.now()
    clf.fit(X_train, y_train)
    t_end = datetime.now()
    print("time = ", t_end - t_begin)
    #模型保存
    current_path = os.path.dirname(os.path.abspath(__file__))
    model_path = current_path + '\\model\\' + dataname + '_' + xgbmethod + ".model"
    joblib.dump(clf, model_path)
    #model_path = "D:\\first\\Network measure\\pkg\\model" + "\\" +"all_method1.1.model"
    #加载模型
    #clf = joblib.load(model_path)
    y_pred = clf.predict(X_test)
    print('xgby_test:', y_test)
    print('xgby_pred:', y_pred)
    print('预测时间:', t_end - t_begin)
    #print(clf.feature_importances_)
    return confusion_matrix(y_test, y_pred), accuracy_score(y_test, y_pred), \
           precision_score(y_test, y_pred,average = 'weighted'), f1_score(y_test, y_pred,average = 'weighted'), \
           recall_score(y_test, y_pred,average = 'weighted')
#lightgbm决策树
def lgb_train(training_data, training_label, testing_data, testing_label):
    #拆分训练集和测试集
    X_train, y_train, X_test, y_test = training_data, training_label, testing_data, testing_label
    clf = lightgbm.LGBMClassifier()
    t_begin = datetime.now()
    clf.fit(X_train, y_train)
    t_end = datetime.now()
    print("lgmtime = ", t_end - t_begin)
    #模型保存
    current_path = os.path.dirname(os.path.abspath(__file__))
    model_path = current_path + '\\model\\' + dataname + '_' + lgbmethod + ".model"
    joblib.dump(clf, model_path)
    # model_path = "D:\\first\\Network measure\\pkg\\model" + "\\" +"all_method1.2.model"
    #加载模型
    # clf = joblib.load(model_path)
    y_pred = clf.predict(X_test)
    print('lgby_test:', y_test)
    print('lgby_pred:', y_pred)
    #print('预测时间:', t_end - t_begin)
    return confusion_matrix(y_test, y_pred), accuracy_score(y_test, y_pred), \
           precision_score(y_test, y_pred,average = 'weighted'), f1_score(y_test, y_pred,average = 'weighted'), \
           recall_score(y_test, y_pred,average = 'weighted')

#catboost决策树
def cat_train(training_data, training_label, testing_data, testing_label):
    #拆分训练集和测试集
    X_train, y_train, X_test, y_test = training_data, training_label, testing_data, testing_label
    clf = catboost.CatBoostClassifier()
    t_begin = datetime.now()
    clf.fit(X_train, y_train)
    t_end = datetime.now()
    print("cattime = ", t_end - t_begin)
    #模型保存
    current_path = os.path.dirname(os.path.abspath(__file__))
    model_path = current_path + '\\model\\' + dataname + '_' + catmethod + ".model"
    joblib.dump(clf, model_path)
    # model_path = "D:\\first\\Network measure\\pkg\\model" + "\\" +"all_method1.3.model"
    #加载模型
    # clf = joblib.load(model_path)
    y_pred = clf.predict(X_test)
    print('caty_test:', y_test)
    print('caty_pred:', y_pred)
    #print('预测时间:', t_end - t_begin)
    return confusion_matrix(y_test, y_pred), accuracy_score(y_test, y_pred), \
           precision_score(y_test, y_pred,average = 'weighted'), f1_score(y_test, y_pred,average = 'weighted'), \
           recall_score(y_test, y_pred,average = 'weighted')
#显示结果
def display_results(results, accuracy, precision, F1score, recall):
    print(results)
    print('accuracy: ', np.round(accuracy, 6))
    print('precision: ', np.round(precision, 6))
    print('f1-score: ', np.round(F1score, 6))
    print('recall: ', np.round(recall, 6))

# def loadmodel_and_evaluate(x_test, y_test):
#         current_path = os.path.dirname(os.path.abspath(__file__))
#         model_path = current_path + '\\model\\' + dataset + '_' + method + ".model"
#         clf = joblib.load(model_path)
#         print(model_path + " loaded successfully")
#         y_pred = clf.predict(x_test)
#
#         return confusion_matrix(y_test, y_pred), accuracy_score(y_test, y_pred), \
#                precision_score(y_test, y_pred), f1_score(y_test, y_pred), \
#                recall_score(y_test, y_pred)


#nondoh===0
#benign===1
#dns2tcp===2
#dnscat2===3
#iodine===4

dataname = "all"
xgbmethod = "method1.1"
lgbmethod = "method1.2"
catmethod = "method1.3"


if __name__ ==  '__main__':

    time_begin = datetime.now()

    print("training......")

    # 载入数据集
    dataset = pd.read_csv('all.csv', delimiter = ',', low_memory=False)

    dataset = dataset[~dataset['Label'].isin(["FALSE"])]
    df_dataset = dataset
    dataset = dataset.values

    # 划分数据集和标签
    data = dataset[:, 5:34]
    label = dataset[:, 34]
    #归一化处理 X为处理完的数据
    X = preprocessing.minmax_scale(data)
    test_size = 0.3
    accuracy, precision, f1score, recall, fpr = 0, 0, 0, 0, 0
    num = 3
    label = np.array(label).astype('int32')

    #划分训练集和测试集
    #for i in range(num):#交叉验证循环
    training_data, testing_data, training_label, test_label = train_test_split(data, label, test_size = 0.3, random_state = 7)
    #print('training_data:', training_data, 'training_label', training_label, 'tesing_data:', testing_data, 'test_label', test_label)

    results, a, b, c, d = xgb_train(training_data, training_label, testing_data, test_label)
    #训练模型
    # xgb_train(training_data, testing_data, training_label, test_label)
    display_results(results, a, b, c, d)

    # accuracy += a
    # precision += b
    # f1score += c
    # recall += d
    # fpr += (results[0][1] / sum(results[0]))

    time_end = datetime.now()
    print('time spend: ',time_end - time_begin)

    df_dataset = df_dataset[~df_dataset['Label'].isin(["0"])]
    dataset = df_dataset.values

    data = dataset[:, 5:34]
    label = dataset[:, 34]
    #归一化处理 X为处理完的数据
    X = preprocessing.minmax_scale(data)
    test_size = 0.3
    accuracy, precision, f1score, recall, fpr = 0, 0, 0, 0, 0
    label = np.array(label).astype('int32')
    training_data, testing_data, training_label, test_label = train_test_split(data, label, test_size = 0.3, random_state = 7)

    results, a, b, c, d = cat_train(training_data, training_label, testing_data, test_label)
    #训练模型
    # xgb_train(training_data, testing_data, training_label, test_label)
    display_results(results, a, b, c, d)

    time_end = datetime.now()
    print('time spend: ',time_end - time_begin)

    df_dataset = df_dataset[~df_dataset['Label'].isin(["1"])]
    dataset = df_dataset.values

    data = dataset[:, 5:34]
    label = dataset[:, 34]
    # 归一化处理 X为处理完的数据
    X = preprocessing.minmax_scale(data)
    #设置测试集大小
    test_size = 0.3
    accuracy, precision, f1score, recall, fpr = 0, 0, 0, 0, 0
    label = np.array(label).astype('int32')
    training_data, testing_data, training_label, test_label = train_test_split(data, label, test_size=0.3,random_state=7)

    results, a, b, c, d = lgb_train(training_data, training_label, testing_data, test_label)
    # 训练模型
    # xgb_train(training_data, testing_data, training_label, test_label)
    display_results(results, a, b, c, d)

    time_end = datetime.now()
    print('time spend: ', time_end - time_begin)
